Import data from CSV files.
p_load(readr)
p_load(stringi)
expenses <- read_csv("XMasExpenses.csv")
## Parsed with column specification:
## cols(
## Total = col_double(),
## Food = col_double(),
## Gifts = col_double(),
## Elect = col_double()
## )
head(round(expenses,0))
## # A tibble: 6 x 4
## Total Food Gifts Elect
## <dbl> <dbl> <dbl> <dbl>
## 1 668 33 252 96
## 2 263 27 128 104
## 3 419 39 201 85
## 4 273 37 219 97
## 5 191 25 154 84
## 6 262 40 225 73
A few summary plots and numerical descriptives follow.
summary(expenses)
## Total Food Gifts Elect
## Min. : 0.0 Min. : 0.00 Min. : 0.0 Min. : 0.00
## 1st Qu.:265.6 1st Qu.:24.76 1st Qu.:178.5 1st Qu.: 74.85
## Median :335.8 Median :30.13 Median :215.3 Median : 88.89
## Mean :363.3 Mean :30.09 Mean :215.2 Mean : 88.60
## 3rd Qu.:452.9 3rd Qu.:35.34 3rd Qu.:252.3 3rd Qu.:102.42
## Max. :788.0 Max. :57.76 Max. :398.3 Max. :166.46
## NA's :1 NA's :12
p_load(lattice)
pairs(expenses)
bwplot(~Total, data=expenses)
bwplot(~Elect, data=expenses)
xyplot(Total~Food, data=expenses)
We now fit a few models.
expenses.lm.Elect = lm(Total~Elect, data=expenses)
xyplot(residuals~fitted.values, data=expenses.lm.Elect, aspect=1)
expenses.lm.ElectFood = lm(Total~Elect+Food, data=expenses)
xyplot(residuals~fitted.values, data=expenses.lm.ElectFood, aspect=1)
expenses.lm.full = lm(Total~Elect+Food+Gifts, data=expenses)
summary(expenses.lm.full)
##
## Call:
## lm(formula = Total ~ Elect + Food + Gifts, data = expenses)
##
## Residuals:
## Min 1Q Median 3Q Max
## -207.50 -86.88 -22.61 43.10 327.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -288.93241 6.99335 -41.315 < 2e-16 ***
## Elect 4.23863 0.04590 92.355 < 2e-16 ***
## Food 1.03324 0.13045 7.921 2.48e-15 ***
## Gifts 1.14150 0.02152 53.054 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 110 on 20425 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.3111, Adjusted R-squared: 0.311
## F-statistic: 3075 on 3 and 20425 DF, p-value: < 2.2e-16
xyplot(residuals~expenses$Gifts, data=expenses.lm.full)
xyplot(residuals~fitted.values, data=expenses.lm.full, aspect=1, cex=.25, col="green")